1 package org.apache.lucene.store;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22 import java.nio.MappedByteBuffer;
23 import java.nio.channels.ClosedChannelException; // javadoc @link
24 import java.nio.channels.FileChannel;
25 import java.nio.channels.FileChannel.MapMode;
26 import java.nio.file.Path;
27 import java.nio.file.StandardOpenOption;
28 import java.security.AccessController;
29 import java.security.PrivilegedAction;
30 import java.security.PrivilegedExceptionAction;
31 import java.security.PrivilegedActionException;
32 import java.util.Locale;
33 import java.util.concurrent.Future;
34 import java.lang.reflect.Method;
35
36 import org.apache.lucene.store.ByteBufferIndexInput.BufferCleaner;
37 import org.apache.lucene.util.Constants;
38 import org.apache.lucene.util.SuppressForbidden;
39
40 /** File-based {@link Directory} implementation that uses
41 * mmap for reading, and {@link
42 * FSDirectory.FSIndexOutput} for writing.
43 *
44 * <p><b>NOTE</b>: memory mapping uses up a portion of the
45 * virtual memory address space in your process equal to the
46 * size of the file being mapped. Before using this class,
47 * be sure your have plenty of virtual address space, e.g. by
48 * using a 64 bit JRE, or a 32 bit JRE with indexes that are
49 * guaranteed to fit within the address space.
50 * On 32 bit platforms also consult {@link #MMapDirectory(Path, LockFactory, int)}
51 * if you have problems with mmap failing because of fragmented
52 * address space. If you get an OutOfMemoryException, it is recommended
53 * to reduce the chunk size, until it works.
54 *
55 * <p>Due to <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4724038">
56 * this bug</a> in Sun's JRE, MMapDirectory's {@link IndexInput#close}
57 * is unable to close the underlying OS file handle. Only when GC
58 * finally collects the underlying objects, which could be quite
59 * some time later, will the file handle be closed.
60 *
61 * <p>This will consume additional transient disk usage: on Windows,
62 * attempts to delete or overwrite the files will result in an
63 * exception; on other platforms, which typically have a "delete on
64 * last close" semantics, while such operations will succeed, the bytes
65 * are still consuming space on disk. For many applications this
66 * limitation is not a problem (e.g. if you have plenty of disk space,
67 * and you don't rely on overwriting files on Windows) but it's still
68 * an important limitation to be aware of.
69 *
70 * <p>This class supplies the workaround mentioned in the bug report
71 * (see {@link #setUseUnmap}), which may fail on
72 * non-Sun JVMs. It forcefully unmaps the buffer on close by using
73 * an undocumented internal cleanup functionality. If
74 * {@link #UNMAP_SUPPORTED} is <code>true</code>, the workaround
75 * will be automatically enabled (with no guarantees; if you discover
76 * any problems, you can disable it).
77 * <p>
78 * <b>NOTE:</b> Accessing this class either directly or
79 * indirectly from a thread while it's interrupted can close the
80 * underlying channel immediately if at the same time the thread is
81 * blocked on IO. The channel will remain closed and subsequent access
82 * to {@link MMapDirectory} will throw a {@link ClosedChannelException}. If
83 * your application uses either {@link Thread#interrupt()} or
84 * {@link Future#cancel(boolean)} you should use the legacy {@code RAFDirectory}
85 * from the Lucene {@code misc} module in favor of {@link MMapDirectory}.
86 * </p>
87 * @see <a href="http://blog.thetaphi.de/2012/07/use-lucenes-mmapdirectory-on-64bit.html">Blog post about MMapDirectory</a>
88 */
89 public class MMapDirectory extends FSDirectory {
90 private boolean useUnmapHack = UNMAP_SUPPORTED;
91 private boolean preload;
92
93 /**
94 * Default max chunk size.
95 * @see #MMapDirectory(Path, LockFactory, int)
96 */
97 public static final int DEFAULT_MAX_CHUNK_SIZE = Constants.JRE_IS_64BIT ? (1 << 30) : (1 << 28);
98 final int chunkSizePower;
99
100 /** Create a new MMapDirectory for the named location.
101 * The directory is created at the named location if it does not yet exist.
102 *
103 * @param path the path of the directory
104 * @param lockFactory the lock factory to use
105 * @throws IOException if there is a low-level I/O error
106 */
107 public MMapDirectory(Path path, LockFactory lockFactory) throws IOException {
108 this(path, lockFactory, DEFAULT_MAX_CHUNK_SIZE);
109 }
110
111 /** Create a new MMapDirectory for the named location and {@link FSLockFactory#getDefault()}.
112 * The directory is created at the named location if it does not yet exist.
113 *
114 * @param path the path of the directory
115 * @throws IOException if there is a low-level I/O error
116 */
117 public MMapDirectory(Path path) throws IOException {
118 this(path, FSLockFactory.getDefault());
119 }
120
121 /** Create a new MMapDirectory for the named location and {@link FSLockFactory#getDefault()}.
122 * The directory is created at the named location if it does not yet exist.
123 *
124 * @param path the path of the directory
125 * @param maxChunkSize maximum chunk size (default is 1 GiBytes for
126 * 64 bit JVMs and 256 MiBytes for 32 bit JVMs) used for memory mapping.
127 * @throws IOException if there is a low-level I/O error
128 */
129 public MMapDirectory(Path path, int maxChunkSize) throws IOException {
130 this(path, FSLockFactory.getDefault(), maxChunkSize);
131 }
132
133 /**
134 * Create a new MMapDirectory for the named location, specifying the
135 * maximum chunk size used for memory mapping.
136 * The directory is created at the named location if it does not yet exist.
137 *
138 * @param path the path of the directory
139 * @param lockFactory the lock factory to use, or null for the default
140 * ({@link NativeFSLockFactory});
141 * @param maxChunkSize maximum chunk size (default is 1 GiBytes for
142 * 64 bit JVMs and 256 MiBytes for 32 bit JVMs) used for memory mapping.
143 * <p>
144 * Especially on 32 bit platform, the address space can be very fragmented,
145 * so large index files cannot be mapped. Using a lower chunk size makes
146 * the directory implementation a little bit slower (as the correct chunk
147 * may be resolved on lots of seeks) but the chance is higher that mmap
148 * does not fail. On 64 bit Java platforms, this parameter should always
149 * be {@code 1 << 30}, as the address space is big enough.
150 * <p>
151 * <b>Please note:</b> The chunk size is always rounded down to a power of 2.
152 * @throws IOException if there is a low-level I/O error
153 */
154 public MMapDirectory(Path path, LockFactory lockFactory, int maxChunkSize) throws IOException {
155 super(path, lockFactory);
156 if (maxChunkSize <= 0) {
157 throw new IllegalArgumentException("Maximum chunk size for mmap must be >0");
158 }
159 this.chunkSizePower = 31 - Integer.numberOfLeadingZeros(maxChunkSize);
160 assert this.chunkSizePower >= 0 && this.chunkSizePower <= 30;
161 }
162
163 /**
164 * <code>true</code>, if this platform supports unmapping mmapped files.
165 */
166 public static final boolean UNMAP_SUPPORTED = AccessController.doPrivileged(new PrivilegedAction<Boolean>() {
167 @Override
168 @SuppressForbidden(reason = "Java 9 Jigsaw whitelists access to sun.misc.Cleaner, so setAccessible works")
169 public Boolean run() {
170 try {
171 Class<?> clazz = Class.forName("java.nio.DirectByteBuffer");
172 Method method = clazz.getMethod("cleaner");
173 method.setAccessible(true);
174 return true;
175 } catch (Exception e) {
176 return false;
177 }
178 }
179 });
180
181 /**
182 * This method enables the workaround for unmapping the buffers
183 * from address space after closing {@link IndexInput}, that is
184 * mentioned in the bug report. This hack may fail on non-Sun JVMs.
185 * It forcefully unmaps the buffer on close by using
186 * an undocumented internal cleanup functionality.
187 * <p><b>NOTE:</b> Enabling this is completely unsupported
188 * by Java and may lead to JVM crashes if <code>IndexInput</code>
189 * is closed while another thread is still accessing it (SIGSEGV).
190 * @throws IllegalArgumentException if {@link #UNMAP_SUPPORTED}
191 * is <code>false</code> and the workaround cannot be enabled.
192 */
193 public void setUseUnmap(final boolean useUnmapHack) {
194 if (useUnmapHack && !UNMAP_SUPPORTED)
195 throw new IllegalArgumentException("Unmap hack not supported on this platform!");
196 this.useUnmapHack=useUnmapHack;
197 }
198
199 /**
200 * Returns <code>true</code>, if the unmap workaround is enabled.
201 * @see #setUseUnmap
202 */
203 public boolean getUseUnmap() {
204 return useUnmapHack;
205 }
206
207 /**
208 * Set to {@code true} to ask mapped pages to be loaded
209 * into physical memory on init. The behavior is best-effort
210 * and operating system dependent.
211 * @see MappedByteBuffer#load
212 */
213 public void setPreload(boolean preload) {
214 this.preload = preload;
215 }
216
217 /**
218 * Returns {@code true} if mapped pages should be loaded.
219 * @see #setPreload
220 */
221 public boolean getPreload() {
222 return preload;
223 }
224
225 /**
226 * Returns the current mmap chunk size.
227 * @see #MMapDirectory(Path, LockFactory, int)
228 */
229 public final int getMaxChunkSize() {
230 return 1 << chunkSizePower;
231 }
232
233 /** Creates an IndexInput for the file with the given name. */
234 @Override
235 public IndexInput openInput(String name, IOContext context) throws IOException {
236 ensureOpen();
237 Path path = directory.resolve(name);
238 try (FileChannel c = FileChannel.open(path, StandardOpenOption.READ)) {
239 final String resourceDescription = "MMapIndexInput(path=\"" + path.toString() + "\")";
240 final boolean useUnmap = getUseUnmap();
241 return ByteBufferIndexInput.newInstance(resourceDescription,
242 map(resourceDescription, c, 0, c.size()),
243 c.size(), chunkSizePower, useUnmap ? CLEANER : null, useUnmap);
244 }
245 }
246
247 /** Maps a file into a set of buffers */
248 final ByteBuffer[] map(String resourceDescription, FileChannel fc, long offset, long length) throws IOException {
249 if ((length >>> chunkSizePower) >= Integer.MAX_VALUE)
250 throw new IllegalArgumentException("RandomAccessFile too big for chunk size: " + resourceDescription);
251
252 final long chunkSize = 1L << chunkSizePower;
253
254 // we always allocate one more buffer, the last one may be a 0 byte one
255 final int nrBuffers = (int) (length >>> chunkSizePower) + 1;
256
257 ByteBuffer buffers[] = new ByteBuffer[nrBuffers];
258
259 long bufferStart = 0L;
260 for (int bufNr = 0; bufNr < nrBuffers; bufNr++) {
261 int bufSize = (int) ( (length > (bufferStart + chunkSize))
262 ? chunkSize
263 : (length - bufferStart)
264 );
265 MappedByteBuffer buffer;
266 try {
267 buffer = fc.map(MapMode.READ_ONLY, offset + bufferStart, bufSize);
268 } catch (IOException ioe) {
269 throw convertMapFailedIOException(ioe, resourceDescription, bufSize);
270 }
271 if (preload) {
272 buffer.load();
273 }
274 buffers[bufNr] = buffer;
275 bufferStart += bufSize;
276 }
277
278 return buffers;
279 }
280
281 private IOException convertMapFailedIOException(IOException ioe, String resourceDescription, int bufSize) {
282 final String originalMessage;
283 final Throwable originalCause;
284 if (ioe.getCause() instanceof OutOfMemoryError) {
285 // nested OOM confuses users, because it's "incorrect", just print a plain message:
286 originalMessage = "Map failed";
287 originalCause = null;
288 } else {
289 originalMessage = ioe.getMessage();
290 originalCause = ioe.getCause();
291 }
292 final String moreInfo;
293 if (!Constants.JRE_IS_64BIT) {
294 moreInfo = "MMapDirectory should only be used on 64bit platforms, because the address space on 32bit operating systems is too small. ";
295 } else if (Constants.WINDOWS) {
296 moreInfo = "Windows is unfortunately very limited on virtual address space. If your index size is several hundred Gigabytes, consider changing to Linux. ";
297 } else if (Constants.LINUX) {
298 moreInfo = "Please review 'ulimit -v', 'ulimit -m' (both should return 'unlimited'), and 'sysctl vm.max_map_count'. ";
299 } else {
300 moreInfo = "Please review 'ulimit -v', 'ulimit -m' (both should return 'unlimited'). ";
301 }
302 final IOException newIoe = new IOException(String.format(Locale.ENGLISH,
303 "%s: %s [this may be caused by lack of enough unfragmented virtual address space "+
304 "or too restrictive virtual memory limits enforced by the operating system, "+
305 "preventing us to map a chunk of %d bytes. %sMore information: "+
306 "http://blog.thetaphi.de/2012/07/use-lucenes-mmapdirectory-on-64bit.html]",
307 originalMessage, resourceDescription, bufSize, moreInfo), originalCause);
308 newIoe.setStackTrace(ioe.getStackTrace());
309 return newIoe;
310 }
311
312 private static final BufferCleaner CLEANER = new BufferCleaner() {
313 @Override
314 public void freeBuffer(final ByteBufferIndexInput parent, final ByteBuffer buffer) throws IOException {
315 try {
316 AccessController.doPrivileged(new PrivilegedExceptionAction<Void>() {
317 @Override
318 @SuppressForbidden(reason = "Java 9 Jigsaw whitelists access to sun.misc.Cleaner, so setAccessible works")
319 public Void run() throws Exception {
320 final Method getCleanerMethod = buffer.getClass()
321 .getMethod("cleaner");
322 getCleanerMethod.setAccessible(true);
323 final Object cleaner = getCleanerMethod.invoke(buffer);
324 if (cleaner != null) {
325 cleaner.getClass().getMethod("clean")
326 .invoke(cleaner);
327 }
328 return null;
329 }
330 });
331 } catch (PrivilegedActionException e) {
332 throw new IOException("Unable to unmap the mapped buffer: " + parent.toString(), e.getCause());
333 }
334 }
335 };
336 }